{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6CODJ2BDEdTN"
      },
      "source": [
        "\n",
        "\u003ca href=\"https://colab.research.google.com/github/google-research/bigbird/blob/master/bigbird/summarization/eval.ipynb\" target=\"_parent\"\u003e\u003cimg src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/\u003e\u003c/a\u003e"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YEhLwgZwEWdw"
      },
      "source": [
        "##### Copyright 2020 The BigBird Authors\n",
        "\n",
        "Licensed under the Apache License, Version 2.0 (the \"License\");"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BAEheXiPEUAF"
      },
      "outputs": [],
      "source": [
        "# Copyright 2020 The BigBird Authors. All Rights Reserved.\n",
        "#\n",
        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License.\n",
        "# =============================================================================="
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "zMotUClMFbHj"
      },
      "outputs": [],
      "source": [
        "!pip install git+https://github.com/google-research/bigbird.git -q"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qFqiLMrtELV-"
      },
      "outputs": [],
      "source": [
        "import tensorflow.compat.v2 as tf\n",
        "import tensorflow_datasets as tfds\n",
        "import tensorflow_text as tft\n",
        "from tqdm import tqdm\n",
        "\n",
        "tf.enable_v2_behavior()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1W572kOSHHI2"
      },
      "source": [
        "## Load Saved Model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "k4oqa8sHErHq"
      },
      "outputs": [],
      "source": [
        "path = 'gs://bigbird-transformer/summarization/pubmed/roberta/saved_model'\n",
        "imported_model = tf.saved_model.load(path, tags='serve')\n",
        "summerize = imported_model.signatures['serving_default']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hfw8LCz8HJze"
      },
      "source": [
        "## Setup Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dIwHE9w6FXre"
      },
      "outputs": [],
      "source": [
        "dataset = tfds.load('scientific_papers/pubmed', split='test', shuffle_files=False, as_supervised=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "00sn8AqmHxkk"
      },
      "outputs": [],
      "source": [
        "# inspect at a few examples\n",
        "for ex in dataset.take(3):\n",
        "  print(ex)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "A_fBEcJIHO0g"
      },
      "source": [
        "## Print predictions"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "W51RZ6ylHbE0"
      },
      "outputs": [],
      "source": [
        "predicted_summary = summerize(ex[0])['pred_sent'][0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "executionInfo": {
          "elapsed": 35119,
          "status": "ok",
          "timestamp": 1607932440202,
          "user": {
            "displayName": "Manzil Zaheer",
            "photoUrl": "",
            "userId": "06259716656099187509"
          },
          "user_tz": 480
        },
        "id": "0ubEWsDqGFUq",
        "outputId": "dafe9305-bd33-4c8a-91b7-03fb2ec0f2b2"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Article:\n",
            " b'\\n hepatitis c virus ( hcv ) infection is reported to have a prevalence of approximately 3% worldwide .\\nmajority of patients with chronic hcv have a mild , asymptomatic elevation in serum transaminase levels with no significant clinical symptoms . around 25% of patients with chronic hcv have persistently normal alanine aminotransferase ( pnalt ) .\\ndefinition of normal alanine aminotransferase ( alt ) has changed over time and reference range for normal alt differs based on different laboratory cutoffs .\\nprati et al .   in 2002 suggested new cutoffs with 30  u / l ( international unit ) for men and 19  u / l for women compared to 40 \\nu / l and 30  u / l for men and women , respectively .\\na 2009 american association for the study of liver disease ( aasld ) practice guideline suggested an alt value of 40  u / l on 2 - 3 different occasions separated by at least a month over a period of 6 months .\\nothers have used 3 different alt levels equal to or below upper limit of normal ( uln ) separated by at least 1 month and sometimes over a period of 18 months .\\nit was generally thought that people with pnalt have a mild liver disease and the degree of liver fibrosis is minimal [ 614 ] . based on this ,\\nlater on , it was realized that a considerable number of such patients developed significant inflammation and fibrosis over time .\\nmore recently , treatment has been recommended along the same lines for patients with pnalt as patients with elevated alt .\\nalthough more data is becoming available about the relationship of liver enzymes and course of chronic hcv infection , data regarding hcv infection and pnalt is relatively scarce . because of variation in the definition of pnalt\\n, fewer studies have looked at the relationship of pnalt with chronic hcv infection using updated normal alt definitions .\\ndepartment of hepatology at the university of illinois ( u of i ) medical center , chicago , had a database of over 1200 patients with chronic hcv infection .\\nmedical records of these patients were reviewed in an effort to characterize patients with chronic hcv infection and pnalt .\\nhistological and clinical parameters for patients with pnalt as well as elevated alt were analyzed .\\ndatabase of patients with hcv infection presenting to u of i medical center , chicago , was reviewed .\\npatients with biopsy proven hcv infection and a detectable hcv ribonucleic acid ( rna ) in blood were chosen .\\nof these , patients with an alt at liver biopsy , at least one additional over the next 12 months , and liver biopsy slides available for review were identified .\\nmost of the liver biopsy procedures were done at u of i medical center and in cases where biopsies were done at outside facility they were read again at u of i medical center .\\ntwo expert hepatologists , who were masked to clinical data , assigned knodell et al .  \\nintervals for alt measurement were chosen around the time of liver biopsy as well as 3 , 6 , and 12 months after biopsy .\\npatients with end - stage renal disease like those on dialysis and stage iv chronic kidney disease with creatinine clearance of 1529 , those who received organ transplant , those with co - infection with hiv , those who were positive for hepatitis b surface antigen ( hbsag ) , and those receiving antiviral therapy for chronic hcv were excluded .\\npnalt was defined as alt  30  u / l on at least 2 different occasions over 12 months .\\nstrict pnalt was defined as alt  30  u / l for males and 19 \\nu / l for females . demographic data including age at biopsy , gender , and race were recorded .\\nclinical data included body mass index ( bmi ) , alcohol use , tobacco use , and presence of diabetes mellitus ( dm ) .\\nhcv virus was further characterized by recording hcv rna levels , genotype , and duration of infection .\\nhistological data included individual markers of inflammation like portal tract inflammation , piece meal necrosis , and lobular inflammation as well as fibrosis according to knodell et al . scoring system .\\ninflammatory score ( sum of portal tract inflammation , piece meal necrosis , and lobular inflammation ) and histologic activity index ( hai ) score ( sum of inflammatory score and fibrosis ) were calculated .\\nhistologic data from pnalt was then compared with patients from elevated alt group . finally , clinical characteristics of pnalt with advanced fibrosis were compared with pnalt but with no advanced fibrosis .\\nindependent sample t - test and chi - squared test were used to calculate p values where appropriate .\\na total of 243 patients out of a database of 1200 patients with hcv satisfied the study criteria .\\nmain reasons to exclude a large number of patients were a lack of detectable rna despite biopsy report , outside biopsy report but slides not available for review , single or no alt value , and patients undergoing treatments .\\nthose analyzed were further divided into pnalt , strict pnalt , and elevated alt group .\\n32 ( 13% ) of these patients were identified as pnalt group and 211 ( 87% ) were identified as elevated alt group . only 13 ( 5% ) patients satisfied criterion for strict pnalt and this group was not analyzed further .\\nthe range of alt values at different time intervals was specified ( table 1 ) .\\n24 ( 75% ) of pnalt patients were females while 85 ( 40% ) with elevated alt were females .\\n13 ( 41% ) with pnalt were african american ( aa ) compared to 87 ( 41% ) with elevated alt , 14 ( 44% ) were caucasian ( w ) compared to 79 ( 38% ) with elevated alt , and 5 ( 15% ) were hispanic ( h ) compared to 44 ( 21% ) with elevated alt .\\nthere was no statistically significant difference in the racial distribution between pnalt and elevated alt group .\\nthere was a higher frequency of women in the pnalt group compared to the elevated alt group ( p = 0.001 ) .\\ndiabetes and alcohol use were more common among patients with elevated alt compared to pnalt ( p = 0.04 and 0.049 , resp . ) .\\nmost notably , patients with pnalt had a higher rate of cirrhosis ( p = 0.007 ) .\\nthere were no differences in age at biopsy , tobacco use , bmi , rna level , and duration of infection between pnalt and elevated alt groups ( table 2 ) .\\nfurther evaluation of liver histology showed no statistically significant difference in mean fibrosis score , mean portal tract inflammation score , mean piecemeal necrosis score ( pmn ) , mean lobular inflammation score , mean histologic activity index ( hai ) score , and mean inflammatory score between pnalt group and elevated alt group ( table 3 ) .\\ncomparison of clinical characteristics of pnalt group with advanced fibrosis with pnalt group without advanced fibrosis showed that only platelet count was significantly different between the two groups ( table 4 ) .\\ntables 5 and 6 characterize the distribution of hcv genotypes based on pnalt and hai score , respectively .\\nthe natural history of chronic hcv infection with pnalt is poorly understood [ 1820 ] .\\nwe attempt to describe the characteristics of patients with pnalt , which constitutes almost 2530% of patients with chronic hcv infection .\\nfirstly , a high proportion of patients with pnalt had advanced fibrosis , and degree of inflammation was not significantly different than chronic hcv infection with abnormal alt .\\nsecondly , it was difficult to identify a substantially large set of patients with hcv infection and pnalt given that there is a significant fluctuation in the alt level over time [ 9 , 15 ] .\\nwe chose duration of 12 months to observe the levels of alt instead of 6 months period .\\nit is becoming clear that 6 months is probably too short given that in some cases alt level may fluctuate after initial period of stability [ 7 , 2124 ] .\\nmost patients with pnalt were females , which is consistent with earlier findings [ 79 ] .\\nsimilarly , age at biopsy , bmi , rna level , and duration of infection were not significantly different between the two groups .\\nhcv genotype distribution showed that a majority ( 81% ) of patients belonged to genotype 1 and it is a well - characterized fact .\\nthere was no significant difference in terms of distribution of genotypes between the 2 groups ( table 5 ) .\\nalso there was no significant difference in hai according to genotype distribution ( table 6 ) .\\nhcv genotyping was performed in 181/243 ( 75% ) patients and was missing in 62 ( 25% ) patients .\\nthe likely reason was transition from paper to electronic records in 1990s and loss of some data .\\nwithin pnalt , those with advanced fibrosis differed from those without advanced fibrosis by platelet count only .\\nsimilarly , pnalt patients were divided based on low - normal alt ( \u003c 19 ) and high - normal alt ( 2030 ) for comparing hai scores among them but no significance was seen ( table 7 ) .\\nstudies to date have been mentioning a milder disease for pnalt in terms of fibrosis and necroinflammation [ 79 , 2628 ] .\\nsome studies have pointed to this fact as well [ 14 , 29 , 30 ] .\\nthis is an interesting finding given that despite significant inflammation ( comparable to abnormal alt ) the alt levels in some of these patients have been consistently low .\\nsimilarly , advanced fibrosis was more common in pnalt group as compared to the elevated alt group ( p = 0.007 ) .\\nit is thought that alt levels normalize in patients with advanced fibrosis   and that is why some authors will advocate doing liver biopsy in patients with hcv infection and normal alt levels .\\nit is interesting to note that the 6 patients with pnalt who had cirrhosis also had evidence of thrombocytopenia .\\nour results indicate that platelet count can be used as a marker to predict fibrosis in patients with pnalt .\\nfor instance , almost all patients in the study group had an alt measured around biopsy but only slightly more than half had alt measured around 12 months .\\nsecond , sample size was relatively small and might not be a true representative of patients with pnalt .\\nthis might in particular be valid for pnalt with advanced fibrosis as 8 ( 25% ) out of 32 patients with pnalt had f3-f4 while only 19 ( 9% ) out of 211 patients with elevated alt had f3-f4 ( p = 0.007 ) .\\nit is not clear if the outcome would have been the same if denominator for pnalt was high .\\nsmall sample size was caused mainly as described before as well as comorbid conditions like advanced kidney disease , hiv , hbsag positive , and being on antiviral treatment .\\nfor example , 11 patients with pnalt were excluded as they had esrd ; alt levels are known to be lower in esrd [ 34 , 35 ] secondary to an impaired immune response in patients with esrd .\\nthis raises concern that those with pnalt and severe liver fibrosis may have been in biochemical remission .\\nfor example , of the 8 patients with severe liver fibrosis ( stages 3 and 4 ) and pnalt , only 2 patients had 4 alt measurements over 12 months ( over the period of 0 , 3 , 6 , and 12 months ) , while 3 patients had 3 alt measurements over 12 months , and the remaining 3 patients had only 2 alt measurements over the 12 months period .\\nthus , it is not possible to say with certainty that all patients with pnalt and severe liver damage had uniformly low alt all along .\\nin conclusion , histological changes observed in hcv patients with pnalt will argue that alt is not a reliable indicator of hepatic inflammation or fibrosis .\\nfemale gender , absence of dm , and abstinence from alcohol were associated with pnalt .\\nthese findings indicate the need for more studies with higher number of pnalt patients to look at the relationship of pnalt with changes occurring at histological and molecular levels .'\n",
            "\n",
            " Predicted summary:\n",
            " b' \\xe2\\x81\\x87  background .  \\xe2\\x81\\x87  chronic hepatitis c virus ( hcv ) infection with alanine aminotransferase ( pnalt ) is not a reliable indicator of hepatic inflammation or fibrosis .  \\xe2\\x81\\x87  fewer studies have looked at the relationship of pnalt with chronic hcv infection using updated normal alanine aminotransferase definitions .  \\xe2\\x81\\x87  patients and methods .  \\xe2\\x81\\x87  medical records of patients with chronic hcv infection were reviewed in an effort to characterize patients with chronic hcv infection and pnalt .  \\xe2\\x81\\x87  pnalt was defined as alt 30 u / l on at least two occasions over 12 months .  \\xe2\\x81\\x87  results .  \\xe2\\x81\\x87  a total of 243 patients with chronic hcv infection were included .  \\xe2\\x81\\x87  patients with pnalt were divided into pnalt , strict pnalt , and elevated alt group .  \\xe2\\x81\\x87  patients with pnalt were more likely to have advanced fibrosis ( p = 0.001 ) .  \\xe2\\x81\\x87  patients with pnalt had a higher rate of cirrhosis ( p = 0.007 ) .  \\xe2\\x81\\x87  patients with pnalt had a higher rate of cirrhosis ( p = 0.007 ) .  \\xe2\\x81\\x87  conclusion .  \\xe2\\x81\\x87  histological changes observed in patients with pnalt'\n",
            "\n",
            " Ground truth summary:\n",
            " b' patients with chronic hepatitis c virus ( hcv ) infection and persistently normal alanine aminotransferase ( pnalt ) are generally described to have mild liver disease . \\n the aim of this study was to compare clinical and histological features in hcv - infected patients with pnalt and elevated alt . \\n patients presenting to the university of illinois medical center , chicago , who had biopsy proven hcv , an alt measurement at the time of liver biopsy , at least one additional alt measurement over the next 12 months , and liver biopsy slides available for review were identified . \\n pnalt was defined as alt  30 on at least 2 different occasions over 12 months . \\n of 1200 patients with hcv , 243 met the study criteria . \\n 13% ( 32/243 ) of patients had pnalt while 87% ( 211/243 ) had elevated alt . \\n significantly more patients with pnalt had advanced fibrosis ( f3 and f4 ) compared to those with elevated alt ( p = 0.007 ) . \\n there was no significant difference in the histology activity index score as well as mean inflammatory score between the two groups . in conclusion , in a well - characterized cohort of patients at a tertiary medical center , pnalt did not distinguish patients with mild liver disease . '\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "print('Article:\\n {}\\n\\n Predicted summary:\\n {}\\n\\n Ground truth summary:\\n {}\\n\\n'.format(\n",
        "    ex[0].numpy(),\n",
        "    predicted_summary.numpy(),\n",
        "    ex[1].numpy()))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WQx9-_u6IMWI"
      },
      "source": [
        "## Evaluate Rouge Scores"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wb4wSbmfK11B"
      },
      "outputs": [],
      "source": [
        "from rouge_score import rouge_scorer\n",
        "from rouge_score import scoring"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lnep7S6KIgRB"
      },
      "outputs": [],
      "source": [
        "scorer = rouge_scorer.RougeScorer([\"rouge1\", \"rouge2\", \"rougeLsum\"], use_stemmer=True)\n",
        "aggregator = scoring.BootstrapAggregator()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "0VeTZT34IPoR"
      },
      "outputs": [],
      "source": [
        "for ex in tqdm(dataset.take(100), position=0):\n",
        "  predicted_summary = summerize(ex[0])['pred_sent'][0]\n",
        "  score = scorer.score(ex[1].numpy().decode('utf-8'), predicted_summary.numpy().decode('utf-8'))\n",
        "  aggregator.add_scores(score)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "p2rKqHdOKrmv"
      },
      "outputs": [],
      "source": [
        "aggregator.aggregate()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qoB--1fiLoj1"
      },
      "outputs": [],
      "source": [
        ""
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [],
      "last_runtime": {},
      "name": "UseSavedModel.ipynb"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
